install.packages(c(“tidycensus”, “tidyverse”, “sf”, “mapview”)) Please run the above code if you do this map at the first time
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.5.2
library(sf)
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(mapview)
## Warning: package 'mapview' was built under R version 4.5.2
library(dplyr)
Set the Key: Set your key using the census_api_key() function so R can securely access the data. You only need to do this once per device using install = TRUE.
census_api_key("959ba8ec2ff8f8bf41e4cafecc6ec9727219fe63", install = TRUE,overwrite=TRUE)
manhattan_pop <- get_acs(
geography = "tract",
variables = "B01003_001",
state = "NY",
county = "061",
geometry = TRUE,
year = 2023
)|>
dplyr::select(GEOID, geometry)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
# Ensure consistent Coordinate Reference System (CRS)
manhattan_pop <- st_transform(manhattan_pop, crs = 4326)
# --- A. 2025 Shooting Incident Points ---
shooting25_df =
read_csv("./Data Folder/Shooting_2025.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
distinct(incident_key, .keep_all = TRUE) |>
filter(boro == "MANHATTAN") |>
drop_na(latitude)
## Rows: 769 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): OCCUR_DATE, BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION...
## dbl (5): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, Latitude, Longitude
## num (2): X_COORD_CD, Y_COORD_CD
## time (1): OCCUR_TIME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shooting25_sf <- st_as_sf(
shooting25_df,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- B. 2025 Broken Street Light Points ---
street_light_data_25 <-
read_csv("./Data Folder/street_light_redu.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
filter(city =="MANHATTAN") |>
filter(
descriptor == "Street Light Out"
) |>
mutate(
date_obj = lubridate::dmy_hms(created_date),
created_year = lubridate::year(date_obj)
) |>
# Filter only for the year 2025
filter(created_year == 2025) |>
filter(latitude != 0, longitude != 0)
## Rows: 194278 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Created_Date, Descriptor, City
## dbl (4): Latitude, Longitude, sas_date_value, data_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
street_light_sf_25 <- st_as_sf(
street_light_data_25,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- C. Calculate 2025 Counts Per Tract ---
lights_joined_25 <- st_join(manhattan_pop, street_light_sf_25)
lights_per_tract_25 <- lights_joined_25 |>
group_by(GEOID) |>
summarize(
N_BROKEN_LIGHTS_2025 = sum(!is.na(descriptor)),
.groups = 'drop'
) |>
st_drop_geometry()
# --- D. Final Map Data for 2025 ---
final_map_2025 <- manhattan_pop |>
left_join(lights_per_tract_25, by = "GEOID") |>
mutate(N_BROKEN_LIGHTS_2025 = replace_na(N_BROKEN_LIGHTS_2025, 0))
# --- E. Map Visualization for 2025 ---
mapview(
final_map_2025,
zcol = "N_BROKEN_LIGHTS_2025",
layer.name = "Broken Street Lights per Tract (2025)"
) +
mapview(
shooting25_sf,
col.regions = "red",
cex = 3,
layer.name = "2025 Shooting Case Location"
)
# --- A. CUMULATIVE Shooting Incident Points ---
shooting11_24_df =
read_csv("./Data Folder/Shooting_Historic.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
distinct(incident_key, .keep_all = TRUE) |>
filter(boro == "MANHATTAN") |>
filter(!incident_key %in% c(279138121, 272105041)) |>
mutate(
date_obj = lubridate::mdy(occur_date),
created_year = lubridate::year(date_obj)
) |>
filter(created_year >= 2020 & created_year <= 2024) |>
drop_na(latitude)
## Rows: 29744 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): OCCUR_DATE, BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION...
## dbl (5): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, Latitude, Longitude
## num (2): X_COORD_CD, Y_COORD_CD
## lgl (1): STATISTICAL_MURDER_FLAG
## time (1): OCCUR_TIME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shooting11_24_sf_hist <- st_as_sf(
shooting11_24_df,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- B. CUMULATIVE Broken Street Light Points ---
street11_24_light_data_hist <-
read_csv("./Data Folder/street_light_redu.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
filter(city =="MANHATTAN") |>
filter(
descriptor == "Street Light Out"
) |>
mutate(
date_obj = lubridate::dmy_hms(created_date),
created_year = lubridate::year(date_obj)
) |>
filter(created_year >= 2020 & created_year <= 2024) |>
filter(latitude != 0, longitude != 0)
## Rows: 194278 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Created_Date, Descriptor, City
## dbl (4): Latitude, Longitude, sas_date_value, data_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
street11_24_light_sf_hist <- st_as_sf(
street11_24_light_data_hist,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- C. Calculate CUMULATIVE Counts Per Census Tract ---
lights_joined_hist_24 <- st_join(manhattan_pop, street11_24_light_sf_hist)
lights_per_tract_hist_24 <- lights_joined_hist_24 |>
group_by(GEOID) |>
summarize(
N_BROKEN_LIGHTS_CUM_24 = sum(!is.na(descriptor)),
.groups = 'drop'
)
shootings_joined_hist_24 <- st_join(manhattan_pop, shooting11_24_sf_hist)
shootings_per_tract_hist_24 <- shootings_joined_hist_24 |>
group_by(GEOID) |>
summarize(
N_SHOOTINGS_CUM_24 = sum(!is.na(incident_key)),
.groups = 'drop'
)
# --- D. Final Map Data for CUMULATIVE 2020-2024 ---
final_cumulative_data_24 <- manhattan_pop |>
left_join(lights_per_tract_hist_24 |> st_drop_geometry(), by = "GEOID") |>
left_join(shootings_per_tract_hist_24 |> st_drop_geometry(), by = "GEOID") |>
mutate(
N_BROKEN_LIGHTS_CUM_24 = replace_na(N_BROKEN_LIGHTS_CUM_24, 0),
N_SHOOTINGS_CUM_24 = replace_na(N_SHOOTINGS_CUM_24, 0)
)
# --- E. Map Visualization for CUMULATIVE 2020-2024 ---
mapview(
final_cumulative_data_24,
zcol = "N_BROKEN_LIGHTS_CUM_24",
layer.name = "Cumulative Broken Street Lights (2011-2024)"
) +
mapview(
shooting11_24_sf_hist,
col.regions = "red",
cex = 3,
layer.name = "Cumulative Shooting Case Locations (2011-2024)"
)
shooting25_df =
read_csv("Data Folder/Shooting_2025.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
distinct(incident_key, .keep_all = TRUE) |>
filter(boro == "MANHATTAN") |>
drop_na(latitude) |>
mutate(statistical_murder_flag = case_match(statistical_murder_flag,
"N" ~ FALSE,
"Y" ~ TRUE))
## Rows: 769 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (13): OCCUR_DATE, BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION...
## dbl (5): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, Latitude, Longitude
## num (2): X_COORD_CD, Y_COORD_CD
## time (1): OCCUR_TIME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
shooting06_24_df =
read_csv("Data Folder/Shooting_Historic.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
distinct(incident_key, .keep_all = TRUE) |>
filter(boro == "MANHATTAN",
!incident_key %in% c(279138121, 272105041)) |>
drop_na(latitude)
## Rows: 29744 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): OCCUR_DATE, BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION...
## dbl (5): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, Latitude, Longitude
## num (2): X_COORD_CD, Y_COORD_CD
## lgl (1): STATISTICAL_MURDER_FLAG
## time (1): OCCUR_TIME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
merged_shooting_df <- bind_rows(shooting25_df, shooting06_24_df)
# --- A. CUMULATIVE Shooting Incident Points ---
shooting11_25_df =
merged_shooting_df |>
janitor::clean_names() |>
distinct(incident_key, .keep_all = TRUE) |>
filter(boro == "MANHATTAN") |>
filter(!incident_key %in% c(279138121, 272105041)) |>
mutate(
date_obj = lubridate::mdy(occur_date),
created_year = lubridate::year(date_obj)
) |>
# FILTER CHANGE: Filter for the cumulative range (2011 to 2025)
filter(created_year >= 2020 & created_year <= 2025) |>
drop_na(latitude)
shooting11_25_sf_hist <- st_as_sf(
shooting11_25_df,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- B. CUMULATIVE Broken Street Light Points ---
street11_25_light_data_hist <-
read_csv("./Data Folder/street_light_redu.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
filter(city =="MANHATTAN") |>
filter(
descriptor == "Street Light Out"
) |>
mutate(
date_obj = lubridate::dmy_hms(created_date),
created_year = lubridate::year(date_obj)
) |>
# FILTER CHANGE: Filter for the cumulative range (2011 to 2025)
filter(created_year >= 2020 & created_year <= 2025) |>
filter(latitude != 0, longitude != 0)
## Rows: 194278 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Created_Date, Descriptor, City
## dbl (4): Latitude, Longitude, sas_date_value, data_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
street11_25_light_sf_hist <- st_as_sf(
street11_25_light_data_hist,
coords = c("longitude", "latitude"),
crs = 4326
)
# --- C. Calculate CUMULATIVE Counts Per Census Tract ---
lights_joined_hist_25 <- st_join(manhattan_pop, street11_25_light_sf_hist)
lights_per_tract_hist_25 <- lights_joined_hist_25 |>
group_by(GEOID) |>
summarize(
N_BROKEN_LIGHTS_CUM_25 = sum(!is.na(descriptor)),
.groups = 'drop'
)
shootings_joined_hist_25 <- st_join(manhattan_pop, shooting11_25_sf_hist)
shootings_per_tract_hist_25 <- shootings_joined_hist_25 |>
group_by(GEOID) |>
summarize(
N_SHOOTINGS_CUM_25 = sum(!is.na(incident_key)),
.groups = 'drop'
)
# --- D. Final Map Data for CUMULATIVE 2020-2025 ---
final_cumulative_data_25 <- manhattan_pop |>
left_join(lights_per_tract_hist_25 |> st_drop_geometry(), by = "GEOID") |>
left_join(shootings_per_tract_hist_25 |> st_drop_geometry(), by = "GEOID") |>
mutate(
N_BROKEN_LIGHTS_CUM_25 = replace_na(N_BROKEN_LIGHTS_CUM_25, 0),
N_SHOOTINGS_CUM_25 = replace_na(N_SHOOTINGS_CUM_25, 0)
)
# --- E. Map Visualization for CUMULATIVE 2020-2025 ---
mapview(
final_cumulative_data_25,
zcol = "N_BROKEN_LIGHTS_CUM_25",
layer.name = "Cumulative Broken Street Lights (2020-2025)"
) +
mapview(
shooting11_25_sf_hist,
col.regions = "red",
cex = 3,
layer.name = "Cumulative Shooting Case Locations (2020-2025)"
)